# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
#  file name:	local-news-recoding.R
#  date:	June 9, 2020
#  author: Bernhard Clemm von Hohenberg
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# SETUP  ####
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

data_path <- "/Users/bernhardclemm/Dropbox/Mac/Documents/Academia/EXPO/repositories/EXPO2.0/Projects/Local-News/data/"
output_path <- "/Users/bernhardclemm/Dropbox/Mac/Documents/Academia/EXPO/repositories/EXPO2.0/Projects/Local-News/output/"
code_path <- "/Users/bernhardclemm/Dropbox/Mac/Documents/Academia/EXPO/repositories/EXPO2.0/Projects/Local-News/code/"

library(tidyverse)

source(paste0(code_path, "survey-recoding-functions_US.R"))

# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# SURVEY DATA  ####
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

# Data files per waves
us_w0a <- read.csv(paste0(data_path, "surveys/US_survey_w0_lucid_raw.csv"))
us_w0b <- read.csv(paste0(data_path, "surveys/US_survey_w0_qualtrics_raw.csv"))
us_w1 <- read.csv(paste0(data_path, "surveys/US_survey_w1_raw.csv"))
us_w2 <- read.csv(paste0(data_path, "surveys/US_survey_w2_raw.csv"))
us_w3 <- read.csv(paste0(data_path, "surveys/US_survey_w3_raw.csv"))
missing_sociodem <- read.csv(paste0(data_path, "surveys/us_missing_w0_sociodems.csv"))

# Recode and join waves
us_w0a <- recode_US_w0(us_w0a)
us_w0b <- recode_US_w0(us_w0b)
us_w1 <- recode_US_w1(us_w1)
us_w2 <- recode_US_w2(us_w2)
us_w3 <- recode_US_w3(us_w3)
survey_data <- join_US_waves(wave1 = us_w1, wave2 = us_w2, wave3 = us_w3,
                          wave0a = us_w0a, wave0b = us_w0b, 
                          w1_missing_sociodem = missing_sociodem)
rm(us_w0a, us_w0b, us_w1, us_w2, us_w3, missing_sociodem)

# Further recoding

## Define some variable sets

ap_w1_vars <- c(
  "ft_party_opp_w1_stand", 
  "mean_party_opp_w1_stand",
  "selfish_party_opp_w1_stand", 
  "stupid_party_opp_w1_stand")

ap_w2_vars <- c(
  "ft_party_opp_w2_stand", 
  "mean_party_opp_w2_stand",
  "selfish_party_opp_w2_stand", 
  "stupid_party_opp_w2_stand")

ap_w3_vars <- c(
  "ft_party_opp_w3_stand", 
  "mean_party_opp_w3_stand",
  "selfish_party_opp_w3_stand", 
  "stupid_party_opp_w3_stand")

attextr_w1_vars <- c(
  "att_econ_1_w1_fold", "att_econ_2_w1_fold", "att_econ_3_w1_fold",
   "att_climate_1_w1_fold", "att_climate_2_w1_fold", "att_climate_3_w1_fold",
   "att_immigrant_1_w1_fold", "att_immigrant_2_w1_fold", "att_immigrant_3_w1_fold", 
   "att_gun_1_w1_fold", "att_gun_2_w1_fold", "att_gun_3_w1_fold")

attextr_w2_vars <- c(
  "att_econ_1_w2_fold", "att_econ_2_w2_fold", "att_econ_3_w2_fold",
   "att_climate_1_w2_fold", "att_climate_2_w2_fold", "att_climate_3_w2_fold",
   "att_immigrant_1_w2_fold", "att_immigrant_2_w2_fold", "att_immigrant_3_w2_fold", 
   "att_gun_1_w2_fold", "att_gun_2_w2_fold", "att_gun_3_w2_fold")
                     
attextr_w3_vars <- c(
  "att_econ_1_w3_fold", "att_econ_2_w3_fold", "att_econ_3_w3_fold",
   "att_climate_1_w3_fold", "att_climate_2_w3_fold", "att_climate_3_w3_fold",
   "att_immigrant_1_w3_fold", "att_immigrant_2_w3_fold", "att_immigrant_3_w3_fold", 
   "att_gun_1_w3_fold", "att_gun_2_w3_fold", "att_gun_3_w3_fold")

survey_data <- survey_data %>% 
    
  # Dependent variables
    
  ## Intended political participation
  mutate(
    participation_w2 = rowSums(
      across(intersect(
        starts_with("part_"),
        ends_with("w2")),
        na.rm = TRUE)),
    participation_w3 = rowSums(
      across(intersect(
        starts_with("part_"),
        ends_with("w3")),
        na.rm = TRUE))) %>%
  
  ## Affective polarization
  mutate(
    party_w2_cat = case_when(
      party_w2 < 4 ~ "Democrat",
      party_w2 == 4 ~ "Independent",
      party_w2 > 4 ~ "Republican"),
    party_w3_cat = case_when(
      party_w3 < 4 ~ "Democrat",
      party_w3 == 4 ~ "Independent",
      party_w3 > 4 ~ "Republican"),
    
    ft_party_opp_w1 = case_when(
      party_w0_cat == "Democrat" ~ ft_rep_supporters_w1,
      party_w0_cat == "Republican" ~ ft_dem_supporters_w1),
    mean_party_opp_w1 = mean_party_opp_w1*(-1),
    selfish_party_opp_w1 = selfish_party_opp_w1*(-1),
    stupid_party_opp_w1 = stupid_party_opp_w1*(-1),

    ft_party_opp_w2 = case_when(
      party_w2_cat == "Democrat" ~ ft_rep_supporters_w2,
      party_w2_cat == "Republican" ~ ft_dem_supporters_w2),
    mean_party_opp_w2 = mean_party_opp_w2*(-1),
    selfish_party_opp_w2 = selfish_party_opp_w2*(-1),
    stupid_party_opp_w2 = stupid_party_opp_w2*(-1),

    ft_party_opp_w3 = case_when(
      party_w3_cat == "Democrat" ~ ft_rep_supporters_w3,
      party_w3_cat == "Republican" ~ ft_dem_supporters_w3),
    ft_party_opp_w3_stand = scale(ft_party_opp_w3),
    mean_party_opp_w3 = mean_party_opp_w3*(-1),
    selfish_party_opp_w3 = selfish_party_opp_w3*(-1),
    stupid_party_opp_w3 = stupid_party_opp_w3*(-1),
    ) %>%

  ## Attitude extremity
  mutate(
    across(intersect(starts_with("att_"), ends_with("_w1")),
           ~ abs(. - 7), .names = "{.col}_fold"),
    across(intersect(starts_with("att_"), ends_with("_w2")),
           ~ abs(. - 7), .names = "{.col}_fold"),
    across(intersect(starts_with("att_"), ends_with("_w3")),
           ~ abs(. - 7), .names = "{.col}_fold")) %>%
  mutate(
    attextr_w1 = select(., all_of(attextr_w1_vars)) %>%
      rowMeans(na.rm = TRUE),
    attextr_w2 = select(., all_of(attextr_w2_vars)) %>%
      rowMeans(na.rm = TRUE),
    attextr_w3 = select(., all_of(attextr_w3_vars)) %>%
             rowMeans(na.rm = TRUE)) %>%
  
  ## Political knowledge (W3)
  mutate(
    polknow_w3 = select(., c(polknow_pres_w3, polknow_majority_w3,
                        polknow_senator_w3, polknow_spend_w3)) %>%
  rowMeans(na.rm = TRUE)) %>%

  # Independent variables
    
  ## Education
  mutate(
    edu_high_w0 = case_when(
      edu_w0 <= 8 ~ 0,
      edu_w0 >= 9 ~ 1),
    edu_high_w0_fac = factor(case_when(
      edu_w0 <= 8 ~ "Low",
      edu_w0 >= 9 ~ "High")),
    edu_w0_fac_5 = case_when(
      edu_w0 %in% c(1, 5, 6) ~ "Education: Less than high school", 
      edu_w0 %in% c(8) ~ "Education: High school", 
      edu_w0 %in% c(9, 10, 11) ~ "Education: Junior college", 
      edu_w0 %in% c(12, 13) ~ "Education: Bachelor", 
      edu_w0 %in% c(14, 15) ~ "Education: Graduate school"),
    edu_high_w0_fac = relevel(edu_high_w0_fac, ref = 2),
    
    ## Age
    age_w0_cat = case_when(
      age_w0 >= 18 & age_w0 < 25 ~ "Age: 18–24",
      age_w0 >= 25 & age_w0 < 55 ~ "Age: 25–54",
      age_w0 >= 55 & age_w0 < 65 ~ "Age: 55–64",
      age_w0 >= 65 ~ "Age: 65+"),
    
   ## Partisanship
    party_w0_cat = case_when(
      party_w0 %in% c(1, 2, 3) ~ "Democrat", 
      party_w0 %in% c(4) ~ "Independent", 
      party_w0 %in% c(5, 6, 7) ~ "Republican"),
   
   ## Interest
   int_politics_w2w3 = select(., int_politics_w2, int_politics_w3) %>%
     rowMeans(na.rm = TRUE),
    
    ## Ethnicity
    white_w0 = case_when(
      ethn_w0_fac == "White" ~ 1,
      ethn_w0_fac %in% c("Black", "Asian", "Native American", 
                         "Prefer to self-describe") ~ 0),
   white_w0_fac = factor(case_when(
     white_w0 == 1 ~ "White",
     white_w0 == 0 ~ "Non-white")))

#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# TRACE DATA  ####
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

# Read in the person-level totals
totals <- read.csv(paste0(data_path,"traces/visits_totals.csv")) %>%
  select(person_id, visits_u_w1, visits_u_w2, visits_u_w3, 
         active_days_w1, active_days_w2, active_days_w3,
         count_domains_w1, count_domains_w2, count_domains_w3) %>%
  # indicator for whether at least 7 active days per wave
  mutate(included = ifelse(
    active_days_w1 >= 7 & active_days_w2 >= 7 & active_days_w3 >= 7, TRUE, FALSE))

# Read in the visit-level data
trace_data <- read.csv(paste0(data_path, "traces/visits_local_news.csv")) 

# summarize trace data by person/wave/type of outlet
## note that this data set still has those to be excluded via 7-days rule
trace_data_person <- trace_data %>%
  group_by(person_id, loc_nat, wave) %>%
  summarise(exposure = n()) %>% 
  pivot_wider(names_from = c(loc_nat, wave), values_from = exposure) %>%
  rename("loc_w1" = local_1, "loc_w2" = local_2, "loc_w3" = local_3,
         "nat_w1" = national_1, "nat_w2" = national_2, "nat_w3" = national_3) %>%
  select(person_id, loc_w1, loc_w2, loc_w3,
         nat_w1, nat_w2, nat_w3) %>%
  left_join(totals, ., by = "person_id") %>%
  mutate(across(
    starts_with("loc") | starts_with("nat") | starts_with("international"),
    ~ replace_na(.x, 0))) %>%
  mutate(
    # proportional measure
    loc_w1_perc = loc_w1 / visits_u_w1,
    loc_w1_perc_log = scale(log(loc_w1_perc + 1)),
    loc_w2_perc = loc_w2 / visits_u_w2,
    loc_w2_perc_log = scale(log(loc_w2_perc + 1)),
    loc_w3_perc = loc_w3 / visits_u_w3,
    loc_w3_perc_log = scale(log(loc_w3_perc + 1)),
    nat_w1_perc = nat_w1 / visits_u_w1,
    nat_w1_perc_log = scale(log(nat_w1_perc + 1)),
    nat_w2_perc = nat_w2 / visits_u_w2,
    nat_w2_perc_log = scale(log(nat_w2_perc + 1)),
    nat_w3_perc = nat_w3 / visits_u_w3,
    nat_w3_perc_log = scale(log(nat_w3_perc + 1)),
    nat_w1w2w3_perc = (nat_w1 + nat_w2 + nat_w2) / 
      (visits_u_w1 + visits_u_w2 + visits_u_w3),
    nat_w1w2w3_perc_log = scale(log(nat_w1w2w3_perc + 1)),
    nat_w2w3_perc = (nat_w2 + nat_w2) / 
      (visits_u_w2 + visits_u_w3),
    nat_w2w3_perc_log = scale(log(nat_w2w3_perc + 1)),
    # average measure
    loc_w1_mean = loc_w1 / active_days_w1,
    loc_w1_mean_log = scale(log(loc_w1_mean + 1)),
    loc_w2_mean = loc_w2 / active_days_w2,
    loc_w2_mean_log = scale(log(loc_w2_mean + 1)),
    loc_w3_mean = loc_w3 / active_days_w3,
    loc_w3_mean_log = scale(log(loc_w3_mean + 1)),
    nat_w1_mean = nat_w1 / active_days_w1,
    nat_w1_mean_log = scale(log(nat_w1_mean + 1)),
    nat_w2_mean = nat_w2 / active_days_w2,
    nat_w2_mean_log = scale(log(nat_w2_mean + 1)),
    nat_w3_mean = nat_w3 / active_days_w3,
    nat_w3_mean_log = scale(log(nat_w3_mean + 1)),
    nat_w1w2w3_mean = (nat_w1 + nat_w2 + nat_w2) / 
      (active_days_w1 + active_days_w2 + active_days_w3),
    nat_w1w2w3_mean_log = scale(log(nat_w1w2w3_mean + 1)),
    nat_w2w3_mean = (nat_w2 + nat_w2) / 
      (active_days_w2 + active_days_w3),
    nat_w2w3_mean_log = scale(log(nat_w2w3_mean + 1)),
    # total visits
    visits_u_w1_log = scale(log((visits_u_w1 / active_days_w1) + 1)),
    visits_u_w2_log = scale(log((visits_u_w2 / active_days_w2) + 1)),
    visits_u_w3_log = scale(log((visits_u_w3 / active_days_w3) + 1)))
         
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# JOIN TRACE AND SURVEY DATA  ####
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

survey_trace_data <- survey_data %>%
  left_join(., trace_data_person , by = "person_id")

#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# SCALED INDEXES WITH FINAL DATA  ####
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

participation_w2_med <- median(survey_data$participation_w2, na.rm = T)
participation_w3_med <- median(survey_data$participation_w3, na.rm = T)

survey_trace_data <- survey_trace_data %>%
  mutate(
    
    # participation
    participation_w2_bin = case_when(
      participation_w2 <= participation_w2_med ~ 0,
      participation_w2 > participation_w2_med ~ 1),
    participation_w3_bin = case_when(
      participation_w3 <= participation_w3_med ~ 0,
      participation_w3 > participation_w3_med ~ 1),
    
    # Affective polarization
    ft_party_opp_w1_stand = scale(ft_party_opp_w1),
    mean_party_opp_w1_stand = scale(mean_party_opp_w1),
    selfish_party_opp_w1_stand = scale(selfish_party_opp_w1),
    stupid_party_opp_w1_stand = scale(stupid_party_opp_w1),
    ft_party_opp_w2_stand = scale(ft_party_opp_w2),
    mean_party_opp_w2_stand = scale(mean_party_opp_w2),
    selfish_party_opp_w2_stand = scale(selfish_party_opp_w2),
    stupid_party_opp_w2_stand = scale(stupid_party_opp_w2),
    ft_party_opp_w3_stand = scale(ft_party_opp_w3),
    mean_party_opp_w3_stand = scale(mean_party_opp_w3),
    selfish_party_opp_w3_stand = scale(selfish_party_opp_w3),
    stupid_party_opp_w3_stand = scale(stupid_party_opp_w3)) %>%
  
  mutate(
    ap_w1 = select(., all_of(ap_w1_vars)) %>%
      rowMeans(na.rm = TRUE),
    ap_w2 = select(., all_of(ap_w2_vars)) %>%
      rowMeans(na.rm = TRUE),
    ap_w3 = select(., all_of(ap_w3_vars)) %>%
      rowMeans(na.rm = TRUE))

#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# FILTER DATA ####
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

final_data_w1 <- survey_trace_data %>%
  filter(active_days_w1 >= 7)

final_data_w2 <- survey_trace_data %>%
  filter(active_days_w2 >= 7)

final_data_w3 <- survey_trace_data %>%
  filter(active_days_w3 >= 7)

final_data_allwaves <- survey_trace_data %>%
  filter(included == TRUE)

#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# LONG DATA ####
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

final_data_allwaves_l <- final_data_allwaves %>%
  select(person_id, 
         participation_w2, participation_w3,
         participation_w2_bin, participation_w3_bin,
         ap_w1, ap_w2, ap_w3,
         attextr_w1, attextr_w2, attextr_w3,
         polknow_w3, 
         nat_w1_perc, nat_w2_perc, nat_w3_perc,
         loc_w1_perc, loc_w2_perc, loc_w3_perc,
         nat_w1_perc_log, nat_w2_perc_log, nat_w3_perc_log,
         loc_w1_perc_log, loc_w2_perc_log, loc_w3_perc_log,
         nat_w1_mean_log, nat_w2_mean_log, nat_w3_mean_log,
         loc_w1_mean_log, loc_w2_mean_log, loc_w3_mean_log,
         nat_w1w2w3_perc, nat_w1w2w3_perc_log,
         nat_w2w3_perc, nat_w2w3_perc_log,
         nat_w1w2w3_mean_log, nat_w2w3_mean_log,
         visits_u_w1_log, visits_u_w2_log, visits_u_w3_log,
         edu_high_w0, edu_high_w0_fac, white_w0_fac,
         int_politics_w2, int_politics_w3,
         int_politics_w2w3) %>%
  pivot_longer(cols = c(
    participation_w2, participation_w3,
    participation_w2_bin, participation_w3_bin,
    ap_w1, ap_w2, ap_w3,
    attextr_w1, attextr_w2, attextr_w3,
    nat_w1_perc, nat_w2_perc, nat_w3_perc,
    loc_w1_perc, loc_w2_perc, loc_w3_perc,
    nat_w1_perc_log, nat_w2_perc_log, nat_w3_perc_log,
    loc_w1_perc_log, loc_w2_perc_log, loc_w3_perc_log,
    nat_w1_mean_log, nat_w2_mean_log, nat_w3_mean_log,
    loc_w1_mean_log, loc_w2_mean_log, loc_w3_mean_log,
    visits_u_w1_log, visits_u_w2_log, visits_u_w3_log,
    int_politics_w2, int_politics_w3)) %>%
  mutate(wave = case_when(
    grepl("w1", .$name) ~ 1,
    grepl("w2", .$name) ~ 2,
    grepl("w3", .$name) ~ 3),
    variable = gsub("_w1|_w2|_w3", "", .$name)) %>%
  select(-c(name)) %>%
  pivot_wider(names_from = "variable", values_from = "value") 
